home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
C/C++ Users Group Library 1996 July
/
C-C++ Users Group Library July 1996.iso
/
vol_200
/
236_01
/
bawk.c
< prev
next >
Wrap
Text File
|
1989-06-05
|
13KB
|
636 lines
/*
HEADER: CUG236;
TITLE: BAWK Text Pattern/Action Tool (Brod86);
DATE: 05/17/1987;
DESCRIPTION: "BAWK scans text files for regular expression patterns
and executes a user-defined action (C code fragment)
for each specified pattern found. Descended from the
DECUS version of the same program.";
VERSION: 1.1;
KEYWORDS: Text Filter;
FILENAME: BAWK.C;
SEE-ALSO: BAWK.H, BAWK.DOC, BAWKACT.C, BAWKDO.C, BAWKPAT.C,
BAWKSYM.C;
COMPILERS: vanilla;
AUTHORS: W. C. Colley III, B. Brodt;
WARNINGS: "Program runs out of memory under CP/M Eco-C and dies
without a trace. Aztec C II is OK. Won't compile
yet under ECO-C88 -- module bawkdo.c gives a strange
code generator error. REQUIRES that sizeof(int) ==
sizeof(char *)!!!!! Therefore, the MSDOS small model
is OK, but the MSDOS large model is no good.";
*/
/*
* Bawk main program
*/
#define MAIN 1
#include <stdio.h>
#include "bawk.h"
unsigned _STACK = 10000;
/* Functions local to this module. */
void compile(), newfile(), process(), usage();
/*
* Main program
*/
int main( argc, argv )
int argc;
char **argv;
{
char gotrules, didfile, getstdin;
getstdin =
didfile =
gotrules = 0;
/*
* Initialize global variables:
*/
Stackptr = Stackbtm - 1;
Stacktop = Stackbtm + MAXSTACKSZ;
Nextvar = Vartab;
strcpy( Fldsep, " \t" );
strcpy( Rcrdsep, "\n" );
/*
* Parse command line
*/
while ( --argc )
{
if ( **(++argv) == '-' )
{
/*
* Process dash options.
*/
switch (*++(*argv))
{
#ifdef DEBUG
case 'D':
case 'd':
++Debug;
break;
#endif
case 0:
++getstdin;
/* --argv; */
goto dosomething;
break;
default: usage();
}
}
else
{
dosomething:
if ( gotrules )
{
/*
* Already read rules file - assume this is
* is a text file for processing.
*/
if ( ++didfile == 1 && Beginact )
doaction( Beginact );
if ( getstdin )
{
--getstdin;
newfile( 0 );
}
else
newfile( *argv );
process();
}
else
{
/*
* First file name argument on command line
* is assumed to be a rules file - attempt to
* compile it.
*/
if ( getstdin )
{
--getstdin;
newfile( 0 );
}
else
newfile( *argv );
compile();
gotrules = 1;
}
}
}
if ( !gotrules )
usage();
if ( ! didfile )
{
/*
* Didn't process any files yet - process stdin.
*/
newfile( 0 );
if ( Beginact )
doaction( Beginact );
process();
}
if ( Endact )
doaction( Endact );
return 0;
}
/*
* Regular expression/action file compilation routines.
*/
void compile()
{
/*
* Compile regular expressions and C actions into Rules struct,
* reading from current input file "Fileptr".
*/
int c, len;
#ifdef DEBUG
if ( Debug )
error( "compiling...", 0 );
#endif
while ( (c = getcharacter()) != -1 )
{
if ( c==' ' || c=='\t' || c=='\n' )
/* swallow whitespace */
;
else if ( c=='#' )
{
/*
* Swallow comments
*/
while ( (c=getcharacter()) != -1 && c!='\n' )
;
}
else if ( c=='{' )
{
#ifdef DEBUG
if ( Debug ) {
printf ("got curly brace\n");
error( "action", 0 );
}
#endif
/*
* Compile (tokenize) the action string into our
* global work buffer, then allocate some memory
* for it and copy it over.
*/
ungetcharacter( '{' );
len = act_compile( Workbuf );
if ( Rulep && Rulep->action )
{
Rulep->nextrule =
(RULE *)getmem(sizeof(RULE));
Rulep = Rulep->nextrule;
fillmem( Rulep, sizeof(RULE), 0 );
}
if ( !Rulep )
{
/*
* This is the first action encountered.
* Allocate the first Rules structure and
* initialize it
*/
Rules = Rulep =
(RULE *)getmem(sizeof(RULE));
fillmem( Rulep, sizeof(RULE), 0 );
}
Rulep->action = getmem( len );
movemem( Workbuf, Rulep->action, len );
}
else if ( c==',' )
{
#ifdef DEBUG
if ( Debug )
error( "stop pattern", 0 );
#endif
/*
* It's (hopefully) the second part of a two-part
* pattern string. Swallow the comma and start
* compiling an action string.
*/
if ( !Rulep || !Rulep->pattern.start )
error( "stop pattern without a start",
RE_ERROR );
if ( Rulep->pattern.stop )
error( "already have a stop pattern",
RE_ERROR );
len = pat_compile( Workbuf );
Rulep->pattern.stop = getmem( len );
movemem( Workbuf, Rulep->pattern.stop, len );
}
else
{
/*
* Assume it's a regular expression pattern
*/
#ifdef DEBUG
if ( Debug )
error( "start pattern", 0 );
#endif
ungetcharacter( c );
len = pat_compile( Workbuf );
if ( *Workbuf == T_BEGIN )
{
/*
* Saw a "BEGIN" keyword - compile following
* action into special "Beginact" buffer.
*/
len = act_compile( Workbuf );
Beginact = getmem( len );
movemem( Workbuf, Beginact, len );
continue;
}
if ( *Workbuf == T_END )
{
/*
* Saw an "END" keyword - compile following
* action into special "Endact" buffer.
*/
len = act_compile( Workbuf );
Endact = getmem( len );
movemem( Workbuf, Endact, len );
continue;
}
if ( Rulep )
{
/*
* Already saw a pattern/action - link in
* another Rules structure.
*/
Rulep->nextrule =
(RULE *)getmem(sizeof(RULE));
Rulep = Rulep->nextrule;
fillmem( Rulep, sizeof(RULE), 0 );
}
if ( !Rulep )
{
/*
* This is the first pattern encountered.
* Allocate the first Rules structure and
* initialize it
*/
Rules = Rulep =
(RULE *)getmem(sizeof(RULE));
fillmem( Rulep, sizeof(RULE), 0 );
}
if ( Rulep->pattern.start )
error( "already have a start pattern",
RE_ERROR );
Rulep->pattern.start = getmem( len );
movemem( Workbuf, Rulep->pattern.start, len );
}
}
endfile();
}
/*
* Text file main processing loop.
*/
void process()
{
/*
* Read a line at a time from current input file at "Fileptr",
* then apply each rule in the Rules chain to the input line.
*/
int i;
#ifdef DEBUG
if ( Debug )
error( "processing...", 0 );
#endif
Recordcount = 0;
while ( getline() )
{
/*
* Parse the input line.
*/
Fieldcount = parse( Linebuf, Fields, Fldsep );
#ifdef DEBUG
if ( Debug>1 )
{
printf( "parsed %d words:\n", Fieldcount );
for(i=0; i<Fieldcount; ++i )
printf( "<%s>\n", Fields[i] );
}
#endif
Rulep = Rules;
do
{
if ( ! Rulep->pattern.start )
{
/*
* No pattern given - perform action on
* every input line.
*/
doaction( Rulep->action );
}
else if ( Rulep->pattern.startseen )
{
/*
* Start pattern already found - perform
* action then check if line matches
* stop pattern.
*/
doaction( Rulep->action );
if ( dopattern( Rulep->pattern.stop ) )
Rulep->pattern.startseen = 0;
}
else if ( dopattern( Rulep->pattern.start ) )
{
/*
* Matched start pattern - perform action.
* If a stop pattern was given, set "start
* pattern seen" flag and process every input
* line until stop pattern found.
*/
doaction( Rulep->action );
if ( Rulep->pattern.stop )
Rulep->pattern.startseen = 1;
}
}
while ( Rulep = Rulep->nextrule );
/*
* Release memory allocated by parse().
*/
while ( Fieldcount )
free( Fields[ --Fieldcount ] );
}
}
/*
* Miscellaneous functions
*/
int parse( str, wrdlst, delim )
char *str;
char *wrdlst[];
char *delim;
{
/*
* Parse the string of words in "str" into the word list at "wrdlst".
* A "word" is a sequence of characters delimited by one or more
* of the characters found in the string "delim".
* Returns the number of words parsed.
* CAUTION: the memory for the words in "wrd